export VLLM_ENGINE_ITERATION_TIMEOUT_S=36000
export VLLM_RPC_TIMEOUT=36000000
export CUDA_VISIBLE_DEVICES=12,13,14,15
vllm serve /data1/mzn/model/Qwen/Qwen2___5-32B-Instruct -tp 4 --max-model-len $[32*1024] --guided-decoding-backend xgrammar --trust-remote-code --host 0.0.0.0 --port 8000